import scrapy


class DbGroupSpider(scrapy.Spider):
    name = 'db_group'
    allowed_domains = ['douban.com']
    start_urls = ['https://www.douban.com/group/explore?start=1']

    def parse(self, response):
        div_list = response.xpath('//div[@class="article"]/div[1]/div')
        for div in div_list:
            item = {}
            item["喜欢"] = div.xpath("./div[1]/text()")[0].extract()
            item["标题"] = div.xpath("./div[2]/h3//text()")[0].extract()
            item["小组"] = div.xpath("./div[2]/div[2]/span//text()")[1].extract()
            # print (item)
            yield item
        #找到下一页的url地址
        next_url = response.xpath("//span[@class='next']/a/@href").extract()
        if len(next_url) != 0:
            next_url = "https://www.douban.com/group/explore" + next_url[0]
            yield scrapy.Request(
                next_url,
                callback=self.parse
            )